libname home "/scratch/baruch/"; 

options compress=yes; 

%let timer = %sysfunc(datetime());
options nonotes;

%macro NASDAQ_before2015(year1, year2);

proc datasets nolist lib=work; delete _out; quit;

%do date = &year1 %to &year2;
%if %sysfunc(exist(taq.nbbo_&date.)) %then %do;

proc datasets nolist lib=work; 
 delete _1 _2_1 _3_1 _2_2 _3_2 _3 _oneday; 
run; quit;

data _1; set taq.nbbo_&date.;
 keep DATE TIME symbol BBEXLIST BB BOEXLIST BO;
 where time >= ('9:25:00.000000000't) and time <= ('16:05:00.000000000't) 
       and (BBEXLIST contains "Q" or BBEXLIST contains "D" or BBEXLIST contains "T")
       and (BOEXLIST contains "Q" or BOEXLIST contains "D" or BOEXLIST contains "T");
run;


********************************************************************************************;
********************************************************************************************;
***** time ended with 0 ********************************************************************;

%let t = %sysfunc(hms(9,30,0)); %let interval = 5*60; %let next = 10*60;
data _2_1; set _1; format time_m TIME20.9; 
 %do %until (&t. > %sysfunc(hms(16,00,0)));
  %let start = %sysevalf(&t. - &interval.); %let end = %sysevalf(&t. + &interval.);   
  if time >= &start. and time < &end. then do; time_m = &t.; diff_sec =  abs(time - time_m); end;
  %let t = %sysevalf(&t. + &next.);
 %end;
run;

proc sql;
 create table _3_1 as
 select distinct date, time_m as time, time as time_m, symbol, BB as BEST_BID, BBEXLIST as BEST_BIDEX, 
                 BO as BEST_ASK, BOEXLIST as BEST_ASKEX, (BO-BB)/((BO+BB)/2) as spread
 from _2_1
 group by date, symbol, time_m
 having diff_sec = min(diff_sec)
 order by date, symbol, time_m;
quit;

proc sort data = _3_1 nodupkey; by date symbol time; run;


********************************************************************************************;
********************************************************************************************;
***** time ended with 5 ********************************************************************;

%let t = %sysfunc(hms(9,35,0)); %let interval = 5*60; %let next = 10*60;
data _2_2; set _1; format time_m TIME20.9; 
 %do %until (&t. > %sysfunc(hms(15,55,0)));
  %let start = %sysevalf(&t. - &interval.); %let end = %sysevalf(&t. + &interval.);   
  if time >= &start. and time < &end. then do; time_m = &t.; diff_sec =  abs(time - time_m); end;
  %let t = %sysevalf(&t. + &next.);
 %end;
run;

proc sql;
 create table _3_2 as
 select distinct date, time_m as time, time as time_m, symbol, BB as BEST_BID, BBEXLIST as BEST_BIDEX, 
                 BO as BEST_ASK, BOEXLIST as BEST_ASKEX, (BO-BB)/((BO+BB)/2) as spread
 from _2_2
 group by date, symbol, time_m
 having diff_sec = min(diff_sec)
 order by date, symbol, time_m;
quit;

proc sort data = _3_2 nodupkey; by date symbol time; run;


********************************************************************************************;
********************************************************************************************;
***** merge together 0 with 5 **************************************************************;

data _3; set _3_1 _3_2; run;
proc sort data = _3; by date symbol time; run;

%let ym = %substr(&date., 1, 6);

proc sql;
	create table _oneday as
	select distinct a.*, b.cusip
	from _3 as a left join taq.mast_&ym. as b
	on a.symbol=b.symbol 
	where a.date >= b.fdate
	order by date, symbol, time;
quit;


proc datasets nolist lib=work; append base=_out data=_oneday force; run; quit;
%end; %end;

%mend;

%NASDAQ_before2015(year1=19970101, year2=20040630);

/* save data to home dictionary */
data home.NASDAQ_1997to200406; set _out; where time is not missing; run;

options notes;
data _null_;
dur = datetime() - &timer;
put 30*'-' / ' TOTAL DURATION:' dur time13.2 / 30*'-';
run; 
